Concepts taken from Advanced R.

Assignment operator and pipes

y = 1:10
y
##  [1]  1  2  3  4  5  6  7  8  9 10
z <- 1:10
z
##  [1]  1  2  3  4  5  6  7  8  9 10
mean(x = 1:10)
## [1] 5.5
x
## Error in eval(expr, envir, enclos): object 'x' not found
mean(x <- 1:10)
## [1] 5.5
x
##  [1]  1  2  3  4  5  6  7  8  9 10
system.time(x = lapply(1:10, function(x) {Sys.sleep(1); return(x)}))
## Error in system.time(x = lapply(1:10, function(x) {: unused argument (x = lapply(1:10, function(x) {
##     Sys.sleep(1)
##     return(x)
## }))
system.time(x <- lapply(1:10, function(x) {Sys.sleep(1); return(x)}))
##    user  system elapsed 
##   0.017   0.001  10.043
covid <- read.csv("../../Data/covid19.csv")

str(covid)
## 'data.frame':    231264 obs. of  8 variables:
##  $ Country.Region: chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ Province.State: chr  "" "" "" "" ...
##  $ Lat           : num  33.9 33.9 33.9 33.9 33.9 ...
##  $ Long          : num  67.7 67.7 67.7 67.7 67.7 ...
##  $ Date          : chr  "2020-01-22" "2020-01-23" "2020-01-24" "2020-01-25" ...
##  $ Confirmed     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Recovered     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Deaths        : int  0 0 0 0 0 0 0 0 0 0 ...
covid[covid$Country.Region == "Switzerland" & covid$Confirmed == max(covid[covid$Country.Region == "Switzerland", "Confirmed"]),"Date"]
## [1] "2022-06-14" "2022-06-15"
covid |> 
    subset(Country.Region == "Switzerland") |> 
    subset(Confirmed == max(Confirmed)) |> 
    getElement("Date")
## [1] "2022-06-14" "2022-06-15"

Same with tidyverse.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
covid %>%
    filter(Country.Region == "Switzerland") %>%
    filter(Confirmed == max(Confirmed)) %>%
    select("Date")
##         Date
## 1 2022-06-14
## 2 2022-06-15

Pipe into variable

covid %>%
    filter(Country.Region == "Switzerland") %>%
    filter(Confirmed == max(Confirmed)) %>%
    select("Date") -> max_conf_date
max_conf_date
##         Date
## 1 2022-06-14
## 2 2022-06-15
covid %>%
    filter(Country.Region == "Switzerland") %>%
    ggplot() + geom_line(aes(Date, Confirmed, color = Country.Region, group = 1)) +
    theme_classic(base_size = 15) +
    theme(axis.text.x = element_text(angle = 90))

Efficient coding

Reading in data

Write large data frame

matrix(rnorm(n = 10000000), ncol = 10) %>%
    as.data.frame() %>%
    write_csv(file = "../../Data/large_test_data.csv")

read.csv vs read_cvs vs data.table vs vroom

library(bench)
library(data.table)
library(vroom)
library(DT)

bench::mark(
    cur_data <- read.csv("../../Data/large_test_data.csv"),
    cur_data <- readr::read_csv("../../Data/large_test_data.csv"),
    cur_data <- data.table::fread("../../Data/large_test_data.csv"),
    cur_data <- vroom::vroom("../../Data/large_test_data.csv"),
    check = FALSE
) %>% DT::datatable()
## Warning: Some expressions had a GC in every iteration; so filtering is disabled.
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

Loops and apply

Bad

cur_data <- as.matrix(cur_data[1:100000,])

out <- vector()

for (i in 1:nrow(cur_data)) {
    cur_mean <- mean(cur_data[i,])
    out <- c(out, cur_mean)
}

head(out, 100)
##   [1] -0.074207119  0.233431346  0.104103532 -0.518205623 -0.301064250
##   [6] -1.127165639 -0.150435722 -0.339594341 -0.081580333 -0.244686629
##  [11] -0.086633220 -0.032881760  0.138429943  0.067167798 -0.136472178
##  [16]  0.012583186 -0.318490843 -0.408403583 -0.400436714 -0.321152811
##  [21]  0.014438649  0.018794623  0.856228533  0.151950344  0.427907691
##  [26]  0.074230011 -0.005357597  0.176879299 -0.012597546 -0.414030659
##  [31] -0.374366404  0.276757738 -0.302159611 -0.277743381  0.406273566
##  [36]  0.410958393 -0.152828638  0.066117703 -0.392008452  0.434231416
##  [41] -0.083261396 -0.200989223  0.067177543  0.530071588  0.387739119
##  [46] -0.718724171 -0.136835433  0.063941132  0.195991346  0.111075637
##  [51] -0.280771690  0.127466606 -0.162754149  0.632138537 -0.372494478
##  [56]  0.108856251 -0.142078802 -0.300231957 -0.495349114 -0.097214440
##  [61]  0.138398738 -0.150945784  0.227089967  0.575571188 -0.141800980
##  [66]  0.106155740 -0.198169090 -0.376608391  0.097445763  0.058788789
##  [71] -0.461580999 -0.213832622  0.077024249  0.622917701  0.015589750
##  [76]  0.431167475 -0.068877270 -0.165183311  0.152648936  0.108396141
##  [81]  0.033924549 -0.223445850 -0.173617365 -0.098205042 -0.059709630
##  [86] -0.162718309  0.201106567 -0.342937053 -0.161730928 -0.274512559
##  [91] -0.015296974  0.369567604 -0.584568395  0.253261623 -0.071557989
##  [96]  0.108757421  0.145894200 -0.290742230  0.043156695 -0.440033909

Better

out <- apply(cur_data, MARGIN = 1, FUN = mean)
head(out, 100)
##   [1] -0.074207119  0.233431346  0.104103532 -0.518205623 -0.301064250
##   [6] -1.127165639 -0.150435722 -0.339594341 -0.081580333 -0.244686629
##  [11] -0.086633220 -0.032881760  0.138429943  0.067167798 -0.136472178
##  [16]  0.012583186 -0.318490843 -0.408403583 -0.400436714 -0.321152811
##  [21]  0.014438649  0.018794623  0.856228533  0.151950344  0.427907691
##  [26]  0.074230011 -0.005357597  0.176879299 -0.012597546 -0.414030659
##  [31] -0.374366404  0.276757738 -0.302159611 -0.277743381  0.406273566
##  [36]  0.410958393 -0.152828638  0.066117703 -0.392008452  0.434231416
##  [41] -0.083261396 -0.200989223  0.067177543  0.530071588  0.387739119
##  [46] -0.718724171 -0.136835433  0.063941132  0.195991346  0.111075637
##  [51] -0.280771690  0.127466606 -0.162754149  0.632138537 -0.372494478
##  [56]  0.108856251 -0.142078802 -0.300231957 -0.495349114 -0.097214440
##  [61]  0.138398738 -0.150945784  0.227089967  0.575571188 -0.141800980
##  [66]  0.106155740 -0.198169090 -0.376608391  0.097445763  0.058788789
##  [71] -0.461580999 -0.213832622  0.077024249  0.622917701  0.015589750
##  [76]  0.431167475 -0.068877270 -0.165183311  0.152648936  0.108396141
##  [81]  0.033924549 -0.223445850 -0.173617365 -0.098205042 -0.059709630
##  [86] -0.162718309  0.201106567 -0.342937053 -0.161730928 -0.274512559
##  [91] -0.015296974  0.369567604 -0.584568395  0.253261623 -0.071557989
##  [96]  0.108757421  0.145894200 -0.290742230  0.043156695 -0.440033909

Best

out <- rowMeans(cur_data)
head(out, 100)
##   [1] -0.074207119  0.233431346  0.104103532 -0.518205623 -0.301064250
##   [6] -1.127165639 -0.150435722 -0.339594341 -0.081580333 -0.244686629
##  [11] -0.086633220 -0.032881760  0.138429943  0.067167798 -0.136472178
##  [16]  0.012583186 -0.318490843 -0.408403583 -0.400436714 -0.321152811
##  [21]  0.014438649  0.018794623  0.856228533  0.151950344  0.427907691
##  [26]  0.074230011 -0.005357597  0.176879299 -0.012597546 -0.414030659
##  [31] -0.374366404  0.276757738 -0.302159611 -0.277743381  0.406273566
##  [36]  0.410958393 -0.152828638  0.066117703 -0.392008452  0.434231416
##  [41] -0.083261396 -0.200989223  0.067177543  0.530071588  0.387739119
##  [46] -0.718724171 -0.136835433  0.063941132  0.195991346  0.111075637
##  [51] -0.280771690  0.127466606 -0.162754149  0.632138537 -0.372494478
##  [56]  0.108856251 -0.142078802 -0.300231957 -0.495349114 -0.097214440
##  [61]  0.138398738 -0.150945784  0.227089967  0.575571188 -0.141800980
##  [66]  0.106155740 -0.198169090 -0.376608391  0.097445763  0.058788789
##  [71] -0.461580999 -0.213832622  0.077024249  0.622917701  0.015589750
##  [76]  0.431167475 -0.068877270 -0.165183311  0.152648936  0.108396141
##  [81]  0.033924549 -0.223445850 -0.173617365 -0.098205042 -0.059709630
##  [86] -0.162718309  0.201106567 -0.342937053 -0.161730928 -0.274512559
##  [91] -0.015296974  0.369567604 -0.584568395  0.253261623 -0.071557989
##  [96]  0.108757421  0.145894200 -0.290742230  0.043156695 -0.440033909

Different apply functions:

  • apply
  • lapply
  • sapply
  • vapply
  • tapply
  • mapply
cur_list <- lapply(1:10, function(x){
    rnorm(n = sample(1:10, 1))
})

out <- lapply(cur_list, mean)
out
## [[1]]
## [1] 0.1063153
## 
## [[2]]
## [1] 0.3372581
## 
## [[3]]
## [1] -0.00760443
## 
## [[4]]
## [1] -0.5655898
## 
## [[5]]
## [1] 1.116056
## 
## [[6]]
## [1] -0.09989607
## 
## [[7]]
## [1] 0.5124912
## 
## [[8]]
## [1] 0.2324609
## 
## [[9]]
## [1] 0.544829
## 
## [[10]]
## [1] 0.02257419
do.call(c, out)
##  [1]  0.10631534  0.33725808 -0.00760443 -0.56558980  1.11605635 -0.09989607
##  [7]  0.51249119  0.23246090  0.54482903  0.02257419
x <- list(entr1 = 1:10, entr2 = 20:30)

lapply(x, `[[`, 3)
## $entr1
## [1] 3
## 
## $entr2
## [1] 22
lapply(x, `[[<-`, 3, 120)
## $entr1
##  [1]   1   2 120   4   5   6   7   8   9  10
## 
## $entr2
##  [1]  20  21 120  23  24  25  26  27  28  29  30
sapply(cur_list, mean)
##  [1]  0.10631534  0.33725808 -0.00760443 -0.56558980  1.11605635 -0.09989607
##  [7]  0.51249119  0.23246090  0.54482903  0.02257419
vapply(cur_list, mean, FUN.VALUE = 0)
##  [1]  0.10631534  0.33725808 -0.00760443 -0.56558980  1.11605635 -0.09989607
##  [7]  0.51249119  0.23246090  0.54482903  0.02257419
vapply(cur_list, mean, FUN.VALUE = "test")
## Error in vapply(cur_list, mean, FUN.VALUE = "test"): values must be type 'character',
##  but FUN(X[[1]]) result is type 'double'
tapply(covid$Confirmed, covid$Country.Region, max)
##                      Afghanistan                          Albania 
##                           181236                           276821 
##                          Algeria                          Andorra 
##                           265952                            43449 
##                           Angola                       Antarctica 
##                            99761                               11 
##              Antigua and Barbuda                        Argentina 
##                             8537                          9313453 
##                          Armenia                        Australia 
##                           423044                          2688494 
##                          Austria                       Azerbaijan 
##                          4335007                           792785 
##                          Bahamas                          Bahrain 
##                            35464                           599924 
##                       Bangladesh                         Barbados 
##                          1954637                            82545 
##                          Belarus                          Belgium 
##                           982867                          4177440 
##                           Belize                            Benin 
##                            61150                            26952 
##                           Bhutan                          Bolivia 
##                            59644                           913874 
##           Bosnia and Herzegovina                         Botswana 
##                           378168                           314242 
##                           Brazil                           Brunei 
##                         31611769                           154133 
##                         Bulgaria                     Burkina Faso 
##                          1167466                            20899 
##                            Burma                          Burundi 
##                           613464                            42330 
##                       Cabo Verde                         Cambodia 
##                            57464                           136262 
##                         Cameroon         Central African Republic 
##                           119947                            14649 
##                             Chad                            Chile 
##                             7420                          3844668 
##                            China                         Colombia 
##                          1221808                          6117847 
##                          Comoros              Congo (Brazzaville) 
##                             8131                            24128 
##                 Congo (Kinshasa)                       Costa Rica 
##                            89932                           904934 
##                    Cote d'Ivoire                          Croatia 
##                            82580                          1140625 
##                             Cuba                           Cyprus 
##                          1105664                           493984 
##                          Czechia                          Denmark 
##                          3924356                          3148183 
##                 Diamond Princess                         Djibouti 
##                              712                            15690 
##                         Dominica               Dominican Republic 
##                            14717                           593324 
##                          Ecuador                            Egypt 
##                           892176                           515645 
##                      El Salvador                Equatorial Guinea 
##                           164134                            16001 
##                          Eritrea                          Estonia 
##                             9773                           578494 
##                         Eswatini                         Ethiopia 
##                            72957                           481225 
##                             Fiji                          Finland 
##                            65217                          1114573 
##                           France                            Gabon 
##                         29133309                            47711 
##                           Gambia                          Georgia 
##                            12002                          1658755 
##                          Germany                            Ghana 
##                         27096571                           163191 
##                           Greece                          Grenada 
##                          3517898                            18115 
##                        Guatemala                           Guinea 
##                           874926                            36817 
##                    Guinea-Bissau                           Guyana 
##                             8307                            66129 
##                            Haiti                         Holy See 
##                            31054                               29 
##                         Honduras                          Hungary 
##                           425655                          1923122 
##                          Iceland                            India 
##                           190643                         43257730 
##                        Indonesia                             Iran 
##                          6056017                          7234221 
##                             Iraq                          Ireland 
##                          2330735                          1578284 
##                           Israel                            Italy 
##                          4205416                         17736696 
##                          Jamaica                            Japan 
##                           140874                          9079462 
##                           Jordan                       Kazakhstan 
##                          1697673                          1395085 
##                            Kenya                         Kiribati 
##                           327892                             3215 
##                     Korea, North                     Korea, South 
##                                1                         18256457 
##                           Kosovo                           Kuwait 
##                           228465                           637470 
##                       Kyrgyzstan                             Laos 
##                           201016                           210173 
##                           Latvia                          Lebanon 
##                           831248                          1101508 
##                          Lesotho                          Liberia 
##                            33746                             7460 
##                            Libya                    Liechtenstein 
##                           502076                            17653 
##                        Lithuania                       Luxembourg 
##                          1064538                           253318 
##                       Madagascar                           Malawi 
##                            64685                            86146 
##                         Malaysia                         Maldives 
##                          4532632                           179979 
##                             Mali                            Malta 
##                            31134                            96763 
##                 Marshall Islands                       Mauritania 
##                               18                            59275 
##                        Mauritius                           Mexico 
##                           227442                          5843190 
##                       Micronesia                          Moldova 
##                               35                           519452 
##                           Monaco                         Mongolia 
##                            12550                           926282 
##                       Montenegro                          Morocco 
##                           238123                          1179474 
##                       MS Zaandam                          Namibia 
##                                9                           168591 
##                            Nepal                      Netherlands 
##                           979297                          8114146 
##                      New Zealand                        Nicaragua 
##                          1260441                            14619 
##                            Niger                          Nigeria 
##                             9031                           256404 
##                  North Macedonia                           Norway 
##                           312748                          1438244 
##                             Oman                         Pakistan 
##                           389758                          1531581 
##                            Palau                           Panama 
##                             5165                           896924 
##                 Papua New Guinea                         Paraguay 
##                            44675                           652044 
##                             Peru                      Philippines 
##                          3592765                          3694121 
##                           Poland                         Portugal 
##                          6010643                          5005783 
##                            Qatar                          Romania 
##                           373691                          2912705 
##                           Russia                           Rwanda 
##                         18116672                           130443 
##            Saint Kitts and Nevis                      Saint Lucia 
##                             5873                            26688 
## Saint Vincent and the Grenadines                            Samoa 
##                             9447                            14422 
##                       San Marino            Sao Tome and Principe 
##                            17376                             6010 
##                     Saudi Arabia                          Senegal 
##                           781168                            86186 
##                           Serbia                       Seychelles 
##                          2021264                            44397 
##                     Sierra Leone                        Singapore 
##                             7685                          1352681 
##                         Slovakia                         Slovenia 
##                          2545409                          1029415 
##                  Solomon Islands                          Somalia 
##                            21237                            26675 
##                     South Africa                      South Sudan 
##                          3981739                            17675 
##                            Spain                        Sri Lanka 
##                         12515127                           663965 
##                            Sudan             Summer Olympics 2020 
##                            62489                              865 
##                         Suriname                           Sweden 
##                            80766                          2510930 
##                      Switzerland                          Taiwan* 
##                          3701895                          3072432 
##                       Tajikistan                         Tanzania 
##                            17786                            35354 
##                         Thailand                             Togo 
##                          4492913                            37220 
##                            Tonga              Trinidad and Tobago 
##                            11909                           165144 
##                          Tunisia                           Turkey 
##                          1044426                         15085742 
##                           Uganda                          Ukraine 
##                           166497                          5040518 
##             United Arab Emirates                   United Kingdom 
##                           921566                         22447911 
##                          Uruguay                               US 
##                           943877                         85941735 
##                       Uzbekistan                          Vanuatu 
##                           239319                            10757 
##                        Venezuela                          Vietnam 
##                           524370                         10734151 
##               West Bank and Gaza             Winter Olympics 2022 
##                           658100                              535 
##                            Yemen                           Zambia 
##                            11822                           323654 
##                         Zimbabwe 
##                           254387
aggregate(covid$Confirmed, by = list(country = covid$Country.Region), max)
##                              country        x
## 1                        Afghanistan   181236
## 2                            Albania   276821
## 3                            Algeria   265952
## 4                            Andorra    43449
## 5                             Angola    99761
## 6                         Antarctica       11
## 7                Antigua and Barbuda     8537
## 8                          Argentina  9313453
## 9                            Armenia   423044
## 10                         Australia  2688494
## 11                           Austria  4335007
## 12                        Azerbaijan   792785
## 13                           Bahamas    35464
## 14                           Bahrain   599924
## 15                        Bangladesh  1954637
## 16                          Barbados    82545
## 17                           Belarus   982867
## 18                           Belgium  4177440
## 19                            Belize    61150
## 20                             Benin    26952
## 21                            Bhutan    59644
## 22                           Bolivia   913874
## 23            Bosnia and Herzegovina   378168
## 24                          Botswana   314242
## 25                            Brazil 31611769
## 26                            Brunei   154133
## 27                          Bulgaria  1167466
## 28                      Burkina Faso    20899
## 29                             Burma   613464
## 30                           Burundi    42330
## 31                        Cabo Verde    57464
## 32                          Cambodia   136262
## 33                          Cameroon   119947
## 34          Central African Republic    14649
## 35                              Chad     7420
## 36                             Chile  3844668
## 37                             China  1221808
## 38                          Colombia  6117847
## 39                           Comoros     8131
## 40               Congo (Brazzaville)    24128
## 41                  Congo (Kinshasa)    89932
## 42                        Costa Rica   904934
## 43                     Cote d'Ivoire    82580
## 44                           Croatia  1140625
## 45                              Cuba  1105664
## 46                            Cyprus   493984
## 47                           Czechia  3924356
## 48                           Denmark  3148183
## 49                  Diamond Princess      712
## 50                          Djibouti    15690
## 51                          Dominica    14717
## 52                Dominican Republic   593324
## 53                           Ecuador   892176
## 54                             Egypt   515645
## 55                       El Salvador   164134
## 56                 Equatorial Guinea    16001
## 57                           Eritrea     9773
## 58                           Estonia   578494
## 59                          Eswatini    72957
## 60                          Ethiopia   481225
## 61                              Fiji    65217
## 62                           Finland  1114573
## 63                            France 29133309
## 64                             Gabon    47711
## 65                            Gambia    12002
## 66                           Georgia  1658755
## 67                           Germany 27096571
## 68                             Ghana   163191
## 69                            Greece  3517898
## 70                           Grenada    18115
## 71                         Guatemala   874926
## 72                            Guinea    36817
## 73                     Guinea-Bissau     8307
## 74                            Guyana    66129
## 75                             Haiti    31054
## 76                          Holy See       29
## 77                          Honduras   425655
## 78                           Hungary  1923122
## 79                           Iceland   190643
## 80                             India 43257730
## 81                         Indonesia  6056017
## 82                              Iran  7234221
## 83                              Iraq  2330735
## 84                           Ireland  1578284
## 85                            Israel  4205416
## 86                             Italy 17736696
## 87                           Jamaica   140874
## 88                             Japan  9079462
## 89                            Jordan  1697673
## 90                        Kazakhstan  1395085
## 91                             Kenya   327892
## 92                          Kiribati     3215
## 93                      Korea, North        1
## 94                      Korea, South 18256457
## 95                            Kosovo   228465
## 96                            Kuwait   637470
## 97                        Kyrgyzstan   201016
## 98                              Laos   210173
## 99                            Latvia   831248
## 100                          Lebanon  1101508
## 101                          Lesotho    33746
## 102                          Liberia     7460
## 103                            Libya   502076
## 104                    Liechtenstein    17653
## 105                        Lithuania  1064538
## 106                       Luxembourg   253318
## 107                       Madagascar    64685
## 108                           Malawi    86146
## 109                         Malaysia  4532632
## 110                         Maldives   179979
## 111                             Mali    31134
## 112                            Malta    96763
## 113                 Marshall Islands       18
## 114                       Mauritania    59275
## 115                        Mauritius   227442
## 116                           Mexico  5843190
## 117                       Micronesia       35
## 118                          Moldova   519452
## 119                           Monaco    12550
## 120                         Mongolia   926282
## 121                       Montenegro   238123
## 122                          Morocco  1179474
## 123                       MS Zaandam        9
## 124                          Namibia   168591
## 125                            Nepal   979297
## 126                      Netherlands  8114146
## 127                      New Zealand  1260441
## 128                        Nicaragua    14619
## 129                            Niger     9031
## 130                          Nigeria   256404
## 131                  North Macedonia   312748
## 132                           Norway  1438244
## 133                             Oman   389758
## 134                         Pakistan  1531581
## 135                            Palau     5165
## 136                           Panama   896924
## 137                 Papua New Guinea    44675
## 138                         Paraguay   652044
## 139                             Peru  3592765
## 140                      Philippines  3694121
## 141                           Poland  6010643
## 142                         Portugal  5005783
## 143                            Qatar   373691
## 144                          Romania  2912705
## 145                           Russia 18116672
## 146                           Rwanda   130443
## 147            Saint Kitts and Nevis     5873
## 148                      Saint Lucia    26688
## 149 Saint Vincent and the Grenadines     9447
## 150                            Samoa    14422
## 151                       San Marino    17376
## 152            Sao Tome and Principe     6010
## 153                     Saudi Arabia   781168
## 154                          Senegal    86186
## 155                           Serbia  2021264
## 156                       Seychelles    44397
## 157                     Sierra Leone     7685
## 158                        Singapore  1352681
## 159                         Slovakia  2545409
## 160                         Slovenia  1029415
## 161                  Solomon Islands    21237
## 162                          Somalia    26675
## 163                     South Africa  3981739
## 164                      South Sudan    17675
## 165                            Spain 12515127
## 166                        Sri Lanka   663965
## 167                            Sudan    62489
## 168             Summer Olympics 2020      865
## 169                         Suriname    80766
## 170                           Sweden  2510930
## 171                      Switzerland  3701895
## 172                          Taiwan*  3072432
## 173                       Tajikistan    17786
## 174                         Tanzania    35354
## 175                         Thailand  4492913
## 176                             Togo    37220
## 177                            Tonga    11909
## 178              Trinidad and Tobago   165144
## 179                          Tunisia  1044426
## 180                           Turkey 15085742
## 181                           Uganda   166497
## 182                          Ukraine  5040518
## 183             United Arab Emirates   921566
## 184                   United Kingdom 22447911
## 185                          Uruguay   943877
## 186                               US 85941735
## 187                       Uzbekistan   239319
## 188                          Vanuatu    10757
## 189                        Venezuela   524370
## 190                          Vietnam 10734151
## 191               West Bank and Gaza   658100
## 192             Winter Olympics 2022      535
## 193                            Yemen    11822
## 194                           Zambia   323654
## 195                         Zimbabwe   254387
covid %>%
    group_by(Country.Region) %>%
    summarize(max_confirmed = max(Confirmed))
## # A tibble: 195 × 2
##    Country.Region      max_confirmed
##    <chr>                       <int>
##  1 Afghanistan                181236
##  2 Albania                    276821
##  3 Algeria                    265952
##  4 Andorra                     43449
##  5 Angola                      99761
##  6 Antarctica                     11
##  7 Antigua and Barbuda          8537
##  8 Argentina                 9313453
##  9 Armenia                    423044
## 10 Australia                 2688494
## # … with 185 more rows
cur_list_2 <- as.list(1:10)
mapply(function(x, y){
    return(mean(x) * y)
},
cur_list, cur_list_2)
##  [1]  0.10631534  0.67451615 -0.02281329 -2.26235922  5.58028174 -0.59937639
##  [7]  3.58743832  1.85968720  4.90346127  0.22574189
mapply(`*`, cur_list_2, cur_list_2)
##  [1]   1   4   9  16  25  36  49  64  81 100

Parallelisation

BiocParallel

lot’s of packages: [parallel], [foreach], [future]

I recommend [BiocParallel]

library(BiocParallel)

bench::mark(
    cur_out <- lapply(1:10, function(x){Sys.sleep(5); return(x)}),
    cur_out <- bplapply(1:10, function(x){Sys.sleep(5); return(x)}, BPPARAM = bpparam()),
    memory = FALSE
)
## # A tibble: 2 × 6
## # … with 6 more variables: expression <bch:expr>, min <bch:tm>,
## #   median <bch:tm>, `itr/sec` <dbl>, mem_alloc <bch:byt>, `gc/sec` <dbl>

Object-oriented coding

Mainly relevant for development and not analysis but good to understand

attr(1:10, "class")
## NULL
typeof(1:10)
## [1] "integer"
class(1:10)
## [1] "integer"
is(1:10, "integer")
## [1] TRUE
attr(matrix(1:10), "class")
## NULL
typeof(matrix(1:10))
## [1] "integer"
class(matrix(1:10))
## [1] "matrix" "array"
is(matrix(1:10), "integer")
## [1] FALSE
# Class inheritance
is(matrix(1:10), "matrix")
## [1] TRUE
is(matrix(1:10), "array")
## [1] TRUE
cur_pca <- prcomp(iris[,-5])

class(cur_pca)
## [1] "prcomp"
print(cur_pca)
## Standard deviations (1, .., p=4):
## [1] 2.0562689 0.4926162 0.2796596 0.1543862
## 
## Rotation (n x k) = (4 x 4):
##                      PC1         PC2         PC3        PC4
## Sepal.Length  0.36138659 -0.65658877  0.58202985  0.3154872
## Sepal.Width  -0.08452251 -0.73016143 -0.59791083 -0.3197231
## Petal.Length  0.85667061  0.17337266 -0.07623608 -0.4798390
## Petal.Width   0.35828920  0.07548102 -0.54583143  0.7536574
plot(cur_pca)

# Access slots via "$"
head(cur_pca$sdev)
## [1] 2.0562689 0.4926162 0.2796596 0.1543862
f <- factor(c("a", "b", "c"))
df <- data.frame(a = 1:10, b = letters[1:10])

# generic functions - always use these
print(f)
## [1] a b c
## Levels: a b c
print(df)
##     a b
## 1   1 a
## 2   2 b
## 3   3 c
## 4   4 d
## 5   5 e
## 6   6 f
## 7   7 g
## 8   8 h
## 9   9 i
## 10 10 j
# methods - never use these
print.factor(f)
## [1] a b c
## Levels: a b c
print.factor(df)
##                                                   a 
##                                                1:10 
##                                                   b 
## c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j") 
## attr(,"row.names")
##  [1]  1  2  3  4  5  6  7  8  9 10
## Levels:
## Warning in print.factor(df): factor levels must be "character"
print.data.frame(df)
##     a b
## 1   1 a
## 2   2 b
## 3   3 c
## 4   4 d
## 5   5 e
## 6   6 f
## 7   7 g
## 8   8 h
## 9   9 i
## 10 10 j
print.data.frame(f)
## NULL
## <0 rows> (or 0-length row.names)

S3 class

# Create, then set class
x <- 1
class(x) <- "my_class"

class(x)
## [1] "my_class"
inherits(x, "my_class")
## [1] TRUE
inherits(x, "your_class")
## [1] FALSE
# build a generic function

S3 generic

my_generic <- function(x) UseMethod("my_generic", x)

S3 method

my_generic.my_class <- function(x) x + 10

my_generic(x)
## [1] 11
## attr(,"class")
## [1] "my_class"
my_generic(20)
## Error in UseMethod("my_generic", x): no applicable method for 'my_generic' applied to an object of class "c('double', 'numeric')"

S4 class

setClass("Person", 
  slots = c(
    name = "character", 
    age = "numeric"
  )
)

john <- new("Person", name = "John Smith", age = as.numeric(NA))

is(john)
## [1] "Person"
# Access slots via "@"
john@name
## [1] "John Smith"
slot(john, "age")
## [1] NA

S4 generic

setGeneric("age", function(x) standardGeneric("age"))
## [1] "age"
setGeneric("age<-", function(x, value) standardGeneric("age<-"))
## [1] "age<-"

S4 method

setMethod("age", "Person", function(x) x@age)
setMethod("age<-", "Person", function(x, value) {
  x@age <- value
  x
})

age(john) <- 50
age(john)
## [1] 50

R6

library(R6)

Person <- R6Class("Person", list(
  name = NULL,
  age = NA,
  initialize = function(name, age = NA) {
    stopifnot(is.character(name), length(name) == 1)
    stopifnot(is.numeric(age), length(age) == 1)
    
    self$name <- name
    self$age <- age
  },
  print = function(...) {
    cat("Person: \n")
    cat("  Name: ", self$name, "\n", sep = "")
    cat("  Age:  ", self$age, "\n", sep = "")
    invisible(self)
  },
  add_years = function(x) {
    self$age <- self$age + x 
    invisible(self)
  }
))

nils <- Person$new("Nils", age = "thirty-three")
## Error in initialize(...): is.numeric(age) is not TRUE
nils <- Person$new("Nils", age = 33)

nils$print()
## Person: 
##   Name: Nils
##   Age:  33
nils$add_years(10)
nils$print()
## Person: 
##   Name: Nils
##   Age:  43
nils$age
## [1] 43
nils$
    add_years(10)$
    age
## [1] 53

Common pitfalls

Vector recycling

dim(iris)
## [1] 150   5
cur_vector <- c(TRUE, FALSE, TRUE)
dim(iris[cur_vector,])
## [1] 100   5
cur_vector <- iris$Species == "virginica"
dim(iris[cur_vector,])
## [1] 50  5

the drop argument

iris$subspecies <- factor(rep(letters[1:5], each = 30))

iris %>% count(Species, subspecies) 
##      Species subspecies  n
## 1     setosa          a 30
## 2     setosa          b 20
## 3 versicolor          b 10
## 4 versicolor          c 30
## 5 versicolor          d 10
## 6  virginica          d 20
## 7  virginica          e 30
iris %>% count(Species, subspecies, .drop = FALSE)
##       Species subspecies  n
## 1      setosa          a 30
## 2      setosa          b 20
## 3      setosa          c  0
## 4      setosa          d  0
## 5      setosa          e  0
## 6  versicolor          a  0
## 7  versicolor          b 10
## 8  versicolor          c 30
## 9  versicolor          d 10
## 10 versicolor          e  0
## 11  virginica          a  0
## 12  virginica          b  0
## 13  virginica          c  0
## 14  virginica          d 20
## 15  virginica          e 30

use return

out <- lapply(1:10, function(x){
    cur_out <- data.frame(index = rep(x, 10))
    cur_out$table <- cur_out$index * 1:10
})
out
## [[1]]
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## [[2]]
##  [1]  2  4  6  8 10 12 14 16 18 20
## 
## [[3]]
##  [1]  3  6  9 12 15 18 21 24 27 30
## 
## [[4]]
##  [1]  4  8 12 16 20 24 28 32 36 40
## 
## [[5]]
##  [1]  5 10 15 20 25 30 35 40 45 50
## 
## [[6]]
##  [1]  6 12 18 24 30 36 42 48 54 60
## 
## [[7]]
##  [1]  7 14 21 28 35 42 49 56 63 70
## 
## [[8]]
##  [1]  8 16 24 32 40 48 56 64 72 80
## 
## [[9]]
##  [1]  9 18 27 36 45 54 63 72 81 90
## 
## [[10]]
##  [1]  10  20  30  40  50  60  70  80  90 100
out <- lapply(1:10, function(x){
    cur_out <- data.frame(index = rep(x, 10))
    cur_out$table <- cur_out$index * 1:10
    return(cur_out)
})
out
## [[1]]
##    index table
## 1      1     1
## 2      1     2
## 3      1     3
## 4      1     4
## 5      1     5
## 6      1     6
## 7      1     7
## 8      1     8
## 9      1     9
## 10     1    10
## 
## [[2]]
##    index table
## 1      2     2
## 2      2     4
## 3      2     6
## 4      2     8
## 5      2    10
## 6      2    12
## 7      2    14
## 8      2    16
## 9      2    18
## 10     2    20
## 
## [[3]]
##    index table
## 1      3     3
## 2      3     6
## 3      3     9
## 4      3    12
## 5      3    15
## 6      3    18
## 7      3    21
## 8      3    24
## 9      3    27
## 10     3    30
## 
## [[4]]
##    index table
## 1      4     4
## 2      4     8
## 3      4    12
## 4      4    16
## 5      4    20
## 6      4    24
## 7      4    28
## 8      4    32
## 9      4    36
## 10     4    40
## 
## [[5]]
##    index table
## 1      5     5
## 2      5    10
## 3      5    15
## 4      5    20
## 5      5    25
## 6      5    30
## 7      5    35
## 8      5    40
## 9      5    45
## 10     5    50
## 
## [[6]]
##    index table
## 1      6     6
## 2      6    12
## 3      6    18
## 4      6    24
## 5      6    30
## 6      6    36
## 7      6    42
## 8      6    48
## 9      6    54
## 10     6    60
## 
## [[7]]
##    index table
## 1      7     7
## 2      7    14
## 3      7    21
## 4      7    28
## 5      7    35
## 6      7    42
## 7      7    49
## 8      7    56
## 9      7    63
## 10     7    70
## 
## [[8]]
##    index table
## 1      8     8
## 2      8    16
## 3      8    24
## 4      8    32
## 5      8    40
## 6      8    48
## 7      8    56
## 8      8    64
## 9      8    72
## 10     8    80
## 
## [[9]]
##    index table
## 1      9     9
## 2      9    18
## 3      9    27
## 4      9    36
## 5      9    45
## 6      9    54
## 7      9    63
## 8      9    72
## 9      9    81
## 10     9    90
## 
## [[10]]
##    index table
## 1     10    10
## 2     10    20
## 3     10    30
## 4     10    40
## 5     10    50
## 6     10    60
## 7     10    70
## 8     10    80
## 9     10    90
## 10    10   100

Session info

Here are the packages used in this workshop.

sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] R6_2.5.1            BiocParallel_1.31.8 DT_0.23            
##  [4] vroom_1.5.7         data.table_1.14.2   bench_1.1.2        
##  [7] forcats_0.5.1       stringr_1.4.0       dplyr_1.0.9        
## [10] purrr_0.3.4         readr_2.1.2         tidyr_1.2.0        
## [13] tibble_3.1.7        ggplot2_3.3.6       tidyverse_1.3.1    
## 
## loaded via a namespace (and not attached):
##  [1] lubridate_1.8.0   assertthat_0.2.1  digest_0.6.29     utf8_1.2.2       
##  [5] cellranger_1.1.0  backports_1.4.1   reprex_2.0.1      evaluate_0.15    
##  [9] httr_1.4.3        highr_0.9         pillar_1.7.0      rlang_1.0.2      
## [13] readxl_1.4.0      rstudioapi_0.13   jquerylib_0.1.4   rmarkdown_2.14   
## [17] labeling_0.4.2    htmlwidgets_1.5.4 bit_4.0.4         munsell_0.5.0    
## [21] broom_0.8.0       compiler_4.2.0    modelr_0.1.8      xfun_0.31        
## [25] pkgconfig_2.0.3   htmltools_0.5.2   tidyselect_1.1.2  codetools_0.2-18 
## [29] fansi_1.0.3       crayon_1.5.1      tzdb_0.3.0        dbplyr_2.2.0     
## [33] withr_2.5.0       grid_4.2.0        jsonlite_1.8.0    gtable_0.3.0     
## [37] lifecycle_1.0.1   DBI_1.1.2         magrittr_2.0.3    scales_1.2.0     
## [41] profmem_0.6.0     cli_3.3.0         stringi_1.7.6     farver_2.1.0     
## [45] fs_1.5.2          xml2_1.3.3        bslib_0.3.1       ellipsis_0.3.2   
## [49] generics_0.1.2    vctrs_0.4.1       tools_4.2.0       bit64_4.0.5      
## [53] glue_1.6.2        crosstalk_1.2.0   hms_1.1.1         parallel_4.2.0   
## [57] fastmap_1.1.0     yaml_2.3.5        colorspace_2.0-3  rvest_1.0.2      
## [61] knitr_1.39        haven_2.5.0       sass_0.4.1